{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "car evaluation数据集描述：该数据集共1728个样本，有6个特征和一个标签值。我们的目标是对比不同的分类器，选出一个分类效果最好的分类器。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns;sns.set()\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: read_table is deprecated, use read_csv instead.\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>buying</th>\n",
       "      <th>maint</th>\n",
       "      <th>doors</th>\n",
       "      <th>persons</th>\n",
       "      <th>lug_boot</th>\n",
       "      <th>safety</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>small</td>\n",
       "      <td>high</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>low</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>vhigh</td>\n",
       "      <td>vhigh</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>med</td>\n",
       "      <td>med</td>\n",
       "      <td>unacc</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  buying  maint doors persons lug_boot safety category\n",
       "0  vhigh  vhigh     2       2    small    low    unacc\n",
       "1  vhigh  vhigh     2       2    small    med    unacc\n",
       "2  vhigh  vhigh     2       2    small   high    unacc\n",
       "3  vhigh  vhigh     2       2      med    low    unacc\n",
       "4  vhigh  vhigh     2       2      med    med    unacc"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names = ['buying','maint','doors','persons','lug_boot','safety','category']\n",
    "\n",
    "car = pd.read_table('D:\\\\Py_dataset\\\\car.data.txt',sep = ',',names = names)\n",
    "car.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1728 entries, 0 to 1727\n",
      "Data columns (total 7 columns):\n",
      "buying      1728 non-null object\n",
      "maint       1728 non-null object\n",
      "doors       1728 non-null object\n",
      "persons     1728 non-null object\n",
      "lug_boot    1728 non-null object\n",
      "safety      1728 non-null object\n",
      "category    1728 non-null object\n",
      "dtypes: object(7)\n",
      "memory usage: 94.6+ KB\n"
     ]
    }
   ],
   "source": [
    "car.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['vhigh' 'high' 'med' 'low']\n",
      "['vhigh' 'high' 'med' 'low']\n",
      "['2' '3' '4' '5more']\n",
      "['2' '4' 'more']\n",
      "['small' 'med' 'big']\n",
      "['low' 'med' 'high']\n",
      "['unacc' 'acc' 'vgood' 'good']\n"
     ]
    }
   ],
   "source": [
    "print(car['buying'].unique())\n",
    "print(car['maint'].unique())\n",
    "print(car['doors'].unique())\n",
    "print(car['persons'].unique())\n",
    "print(car['lug_boot'].unique())\n",
    "print(car['safety'].unique())\n",
    "print(car['category'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "buying_map = {'vhigh':0, 'high':1, 'med':2, 'low':3}\n",
    "maint_map = {'vhigh':0, 'high':1, 'med':2, 'low':3}\n",
    "doors_map = {'2':2, '3':3, '4':4, '5more':5}\n",
    "persons_map = {'2':2, '4':4, 'more':5}\n",
    "lug_boot_map = {'small':0, 'med':1, 'big':2}\n",
    "safety_map = {'low':0, 'med':1, 'high':2}\n",
    "category_map = {'unacc':0, 'acc':1, 'vgood':2, 'good':3}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "car['buying'] = car['buying'].map(buying_map)\n",
    "car['maint'] = car['maint'].map(maint_map)\n",
    "car['doors'] = car['doors'].map(doors_map)\n",
    "car['persons'] = car['persons'].map(persons_map)\n",
    "car['lug_boot'] = car['lug_boot'].map(lug_boot_map)\n",
    "car['safety'] = car['safety'].map(safety_map)\n",
    "car['category'] = car['category'].map(category_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>buying</th>\n",
       "      <th>maint</th>\n",
       "      <th>doors</th>\n",
       "      <th>persons</th>\n",
       "      <th>lug_boot</th>\n",
       "      <th>safety</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   buying  maint  doors  persons  lug_boot  safety  category\n",
       "0       0      0      2        2         0       0         0\n",
       "1       0      0      2        2         0       1         0\n",
       "2       0      0      2        2         0       2         0\n",
       "3       0      0      2        2         1       0         0\n",
       "4       0      0      2        2         1       1         0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "car.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = car.drop('category',axis = 1)\n",
    "label = car['category']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### KNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8016332840435544\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import cross_val_score,KFold\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "knn = KNeighborsClassifier()\n",
    "scores = cross_val_score(knn,train,label,cv = 5)\n",
    "\n",
    "knn_scores = scores.mean()\n",
    "print(knn_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:758: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
      "  \"of iterations.\", ConvergenceWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:758: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
      "  \"of iterations.\", ConvergenceWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:758: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
      "  \"of iterations.\", ConvergenceWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:758: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
      "  \"of iterations.\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.786271676300578\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:758: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
      "  \"of iterations.\", ConvergenceWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "lr = LogisticRegression(solver = 'lbfgs',multi_class = 'auto')\n",
    "scores = cross_val_score(lr,train,label,cv = 5)\n",
    "\n",
    "lr_scores = scores.mean()\n",
    "print(lr_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DecisionTree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7946262938567011\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dt = DecisionTreeClassifier()\n",
    "scores = cross_val_score(dt,train,label,cv=5)\n",
    "\n",
    "dt_scores = scores.mean()\n",
    "print(dt_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 支持向量机SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8813852668369403\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.svm import SVC\n",
    "\n",
    "svc = SVC()\n",
    "scores = cross_val_score(svc,train,label,cv=5)\n",
    "\n",
    "svc_scores = scores.mean()\n",
    "print(svc_scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 集成算法-随机森林"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8224391719317113\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "RFC = RandomForestClassifier()\n",
    "scores = cross_val_score(RFC,train,label,cv=5)\n",
    "\n",
    "RFC_scores = scores.mean()\n",
    "print(RFC_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'bootstrap': True,\n",
       " 'class_weight': None,\n",
       " 'criterion': 'gini',\n",
       " 'max_depth': None,\n",
       " 'max_features': 'auto',\n",
       " 'max_leaf_nodes': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_impurity_split': None,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'n_estimators': 'warn',\n",
       " 'n_jobs': None,\n",
       " 'oob_score': False,\n",
       " 'random_state': None,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RFC.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8264215620379083\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "\n",
    "GBDT = GradientBoostingClassifier()\n",
    "scores = cross_val_score(GBDT,train,label,cv=5)\n",
    "\n",
    "GBDT_scores = scores.mean()\n",
    "print(GBDT_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'KNN': 0.8016332840435544,\n",
       " 'LogisticRegression': 0.786271676300578,\n",
       " 'DT': 0.7946262938567011,\n",
       " 'SVC': 0.8813852668369403,\n",
       " 'RFC': 0.8224391719317113,\n",
       " 'GBDT': 0.8264215620379083}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'KNN':knn_scores,'LogisticRegression':lr_scores,'DT':dt_scores,'SVC':svc_scores,'RFC':RFC_scores,'GBDT':GBDT_scores}\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>SVC</th>\n",
       "      <td>0.881385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GBDT</th>\n",
       "      <td>0.826422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RFC</th>\n",
       "      <td>0.822439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>KNN</th>\n",
       "      <td>0.801633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DT</th>\n",
       "      <td>0.794626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LogisticRegression</th>\n",
       "      <td>0.786272</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      scores\n",
       "SVC                 0.881385\n",
       "GBDT                0.826422\n",
       "RFC                 0.822439\n",
       "KNN                 0.801633\n",
       "DT                  0.794626\n",
       "LogisticRegression  0.786272"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(index = data.keys(),data = data.values(),columns = ['scores']).sort_values(by = 'scores',ascending = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以上几种算法均是在默认参数的情况下，进行计算的，SVC的准确度最高，接着是GBDT,RFC参数。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 模型参数调整"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'bootstrap': True,\n",
       " 'class_weight': None,\n",
       " 'criterion': 'gini',\n",
       " 'max_depth': None,\n",
       " 'max_features': 'auto',\n",
       " 'max_leaf_nodes': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_impurity_split': None,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'n_estimators': 'warn',\n",
       " 'n_jobs': None,\n",
       " 'oob_score': False,\n",
       " 'random_state': None,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RFC.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=3, error_score='raise-deprecating',\n",
       "       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,\n",
       "            oob_score=False, random_state=None, verbose=0,\n",
       "            warm_start=False),\n",
       "       fit_params=None, iid='warn', n_jobs=None,\n",
       "       param_grid={'n_estimators': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'max_depth': [3, 4, 5, 6, 7, 8, 9]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "       scoring=None, verbose=0)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "param_grid = {'n_estimators':[10,20,30,40,50,60,70,80,90,100],'max_depth':[3,4,5,6,7,8,9]}\n",
    "grid = GridSearchCV(RFC,param_grid = param_grid,cv = 3)\n",
    "grid.fit(train,label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=9, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, n_estimators=80, n_jobs=None,\n",
       "            oob_score=False, random_state=None, verbose=0,\n",
       "            warm_start=False)"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8478009259259259 {'max_depth': 9, 'n_estimators': 80}\n"
     ]
    }
   ],
   "source": [
    "print(grid.best_score_,grid.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1728 1728\n"
     ]
    }
   ],
   "source": [
    "print(len(train),len(label))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'criterion': 'friedman_mse',\n",
       " 'init': None,\n",
       " 'learning_rate': 0.1,\n",
       " 'loss': 'deviance',\n",
       " 'max_depth': 3,\n",
       " 'max_features': None,\n",
       " 'max_leaf_nodes': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_impurity_split': None,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'n_estimators': 100,\n",
       " 'n_iter_no_change': None,\n",
       " 'presort': 'auto',\n",
       " 'random_state': None,\n",
       " 'subsample': 1.0,\n",
       " 'tol': 0.0001,\n",
       " 'validation_fraction': 0.1,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "GBDT.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=3, error_score='raise-deprecating',\n",
       "       estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n",
       "              learning_rate=0.1, loss='deviance', max_depth=3,\n",
       "              max_features=None, max_leaf_nodes=None,\n",
       "              min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "              min_samples_leaf=1, min_sampl...      subsample=1.0, tol=0.0001, validation_fraction=0.1,\n",
       "              verbose=0, warm_start=False),\n",
       "       fit_params=None, iid='warn', n_jobs=None,\n",
       "       param_grid={'learning_rate': [0.001, 0.01, 0.1, 1, 10, 100], 'n_estimators': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'max_depth': [3, 4, 5, 6, 7, 8, 9]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "       scoring=None, verbose=0)"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_grid = {'learning_rate':[0.001,0.01,0.1,1,10,100],'n_estimators':[10,20,30,40,50,60,70,80,90,100],'max_depth':[3,4,5,6,7,8,9]}\n",
    "\n",
    "grid = GridSearchCV(GBDT,param_grid = param_grid,cv = 3)\n",
    "grid.fit(train,label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8258101851851852 {'learning_rate': 10, 'max_depth': 9, 'n_estimators': 70}\n"
     ]
    }
   ],
   "source": [
    "print(grid.best_score_,grid.best_params_)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'C': 1.0, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'auto_deprecated', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}\n"
     ]
    }
   ],
   "source": [
    "print(svc.get_params())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**对支持向量机进行参数调整**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\taon1\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=3, error_score='raise-deprecating',\n",
       "       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
       "  kernel='rbf', max_iter=-1, probability=False, random_state=None,\n",
       "  shrinking=True, tol=0.001, verbose=False),\n",
       "       fit_params=None, iid='warn', n_jobs=None,\n",
       "       param_grid={'C': [0.01, 0.1, 1, 10, 100], 'gamma': [0.0001, 0.001, 0.01, 0.1, 1]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "       scoring=None, verbose=0)"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_grid = {'C':[0.01,0.1,1,10,100],'gamma':[0.0001,0.001,0.01,0.1,1]}\n",
    "grid = GridSearchCV(svc,param_grid = param_grid,cv = 3)\n",
    "grid.fit(train,label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8339120370370371 {'C': 0.1, 'gamma': 0.1}\n"
     ]
    }
   ],
   "source": [
    "print(grid.best_score_,grid.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
